NOTE: The dataset, visualizations, and result outputs in this presentation are not representative for any types of business, users, reviews in Yelp.
a. Pulling the data
dataGroupByStateStar <- ylpDataSmall %>%
filter(state != '') %>% mutate(tsum = n()) %>%
group_by(state, stars)
dataForTableByStateStar <- dataGroupByStateStar %>% group_by(state) %>%
summarise(total_business = n(), total_reviews = sum(review_count), avg_rating = round(mean(stars), 2))
b. Loading the data on the table
library(pander)
panderOptions("digits", 3)
pander(dataForTableByStateStar)
| state | total_business | total_reviews | avg_rating |
|---|---|---|---|
| CA | 4000 | 141119 | 3.69 |
| GA | 500 | 15455 | 3.58 |
| IL | 500 | 7188 | 3.51 |
| IN | 393 | 3041 | 3.71 |
| MA | 1298 | 54477 | 3.6 |
| MD | 500 | 5813 | 3.36 |
| MI | 500 | 11634 | 3.66 |
| NC | 500 | 8147 | 3.84 |
| NJ | 500 | 7904 | 3.37 |
| NY | 1382 | 23675 | 3.49 |
| ON | 228 | 1014 | 3.69 |
| PA | 1000 | 20078 | 3.55 |
| RI | 500 | 11086 | 3.64 |
| TX | 1000 | 23935 | 3.72 |
| VA | 189 | 1503 | 3.56 |
| WA | 500 | 17998 | 3.64 |
c. Loading the data on the Leaflet map
library(leaflet)
leaflet(dataTotalAvgStarByState) %>% addTiles() %>% setView(lng = -96.503906,
lat = 38.68551, zoom = 4) %>% addCircles(lng = ~city_lng, lat = ~city_lat,
weight = 0, radius = ~exp(totAvgRatingByState * 1.4) * 800, fillOpacity = 0.5,
color = ~myCol(totAvgRatingByState), popup = ~totAvgRatingByState) %>% addLegend("bottomleft",
pal = myCol, values = ~sort(totAvgRatingByState), title = "Avg.Ratings",
labFormat = labelFormat(prefix = ""), opacity = 0.5)
a. Pulling the data
dataWeightedGroupByStateStar <- dataGroupByStateStar %>%
summarise(totalByStar = n()) %>% arrange(desc(stars)) %>%
mutate(total = sum(totalByStar)) %>% mutate(percent = round((totalByStar / total)*100, 1)) %>%
mutate(percentWeight = ifelse(percent >= 20, percent * 2.5, # custom column to weight the percent for size on the plot
ifelse(percent < 20 & percent >= 15, percent * 1.2,
ifelse(percent < 15 & percent >= 10, percent,
ifelse(percent < 10 & percent >= 5, percent * 0.8, 1)))))
b. Loading the data on the ggplot bubble plot
library(ggplot2)
ggplot(dataWeightedGroupByStateStar, aes(x = state, y = stars, label = percent)) +
geom_point(aes(size = percentWeight * 2, colour = stars, alpha = 0.05)) +
geom_text(hjust = 0.4, size = 4) + scale_size(range = c(1, 30), guide = "none") +
scale_color_gradient(low = "darkblue", high = "red") + labs(title = "A grid of detailed avg.ratings by state ",
x = "State", y = "Detailed Avg.Ratings") + scale_y_continuous(breaks = seq(1,
5, 0.5)) + theme(legend.title = element_blank())
a. Pulling the data
ylpUserSmElite <- ylpUserSm3 %>% filter(elite != "[]")
ylpUserSmNormal <- ylpUserSm3 %>% filter(elite == "[]")
b. Loading the data on the box plot
* All users:
library(ggthemes)
# Yelp users in the boxplot
qplot(fans, review_count, data = ylpUserSm3, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans") +
theme(legend.position = "none")
* Elite users:
# Elite Yelp group users in the boxplot
qplot(fans, review_count, data = ylpUserSmElite, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans (Elite users)") +
theme(legend.position = "none")
* normal users:
# Non-elite Yelp group users in the boxplot
qplot(fans, review_count, data = ylpUserSmNormal, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Total review counts by the number of fans (Non-elite Users)") +
theme(legend.position = "none")
c. Loading the data on the combination plots (point+smooth)
* All users:
# Yelp users in combination plots
qplot(fans, review_count, data = ylpUserSm1, geom = c("point", "smooth"), colour = fans) +
labs(title = "Total review counts by the number of fans") + scale_color_gradient(low = "darkblue",
high = "darkred") + stat_smooth(fill = "green", colour = "cyan", size = 1,
alpha = 0.1)
* elite user group:
# Elite Yelp group users in combination plots
qplot(fans, review_count, data = ylpUserSmElite, geom = c("point", "smooth"),
colour = fans) + labs(title = "Total review counts by the number of fans (Elite users)") +
scale_color_gradient(low = "darkblue", high = "darkred") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
* normal users:
# Non-elite Yelp group users in combination plots
qplot(fans, review_count, data = ylpUserSmNormal, geom = c("point", "smooth"),
colour = fans) + labs(title = "Total review counts by the number of fans (Non-elite users)") +
scale_color_gradient(low = "darkblue", high = "darkred") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
a. Loading the data on the box plot
* All users:
# Yelp users in the boxplot
qplot(fans, average_stars, data = ylpUserSm3, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans") +
theme(legend.position = "none")
* elite user group:
# Elite Yelp group users in the boxplot
qplot(fans, average_stars, data = ylpUserSmElite, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans (Elite users)") +
theme(legend.position = "none")
* normal users:
# Non-elite Yelp group users in the boxplot
qplot(fans, average_stars, data = ylpUserSmNormal, geom = "boxplot", group = Fan_Size,
color = Fan_Size) + labs(title = "Average ratings by the number of fans (Non-elite users)") +
theme(legend.position = "none")
c. Loading the data on the combination plots (point+smooth)
* All users:
# Yelp users in combination plots
qplot(fans, average_stars, data = ylpUserSm1, geom = c("point", "smooth"), colour = fans) +
labs(title = "Average ratings by the number of fans") + scale_color_gradient(low = "darkblue",
high = "darkred") + stat_smooth(fill = "green", colour = "cyan", size = 1,
alpha = 0.1)
* elite user group:
# Elite Yelp group users in combination plots
qplot(fans, average_stars, data = ylpUserSmElite, geom = c("point", "smooth"),
colour = fans) + labs(title = "Average ratings by the number of fans (Elite users)") +
scale_color_gradient(low = "darkblue", high = "red") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)
* normal users:
# Non-elite Yelp group users in combination plots
qplot(fans, average_stars, data = ylpUserSmNormal, geom = c("point", "smooth"),
colour = fans) + labs(title = "Average ratings by the number of fans (Non-elite users)") +
scale_color_gradient(low = "darkblue", high = "red") + stat_smooth(fill = "green",
colour = "cyan", size = 1, alpha = 0.1)